Loading Libraries

library(eurostat)
library(knitr)
library(kableExtra)
library(ggplot2)
library(magrittr)
library(data.table)
library(here)
library(countrycode)
library(gganimate)
library(gapminder)
library(data.table)

Importing the Data

query <- search_eurostat(pattern = "Gender pay gap in unadjusted form", 
                         type = "table", fixed = FALSE)
query[, 1:2]
## # A tibble: 4 x 2
##   title                             code     
##   <chr>                             <chr>    
## 1 Gender pay gap in unadjusted form sdg_05_20
## 2 Gender pay gap in unadjusted form sdg_05_20
## 3 Gender pay gap in unadjusted form tesem180 
## 4 Gender pay gap in unadjusted form sdg_05_20
ct <-  c("AT", "BE", "BG", "CH", "CY", "CZ", "DE", "DK", "EE", "EL", "ES", "FI", "FR", "HR","HU", "IE", "IS", "IT", "LI", "LT","LU", "LV", "MT", "NL", "NO", "PL", "PT","RO", "SE", "SI", "SK", "UK")

PTandEU <-  c("PT","EU27_2020")

Variables dat and dat1

dat = Every country in the EU separately

dat1 = Portugal vs EU

dat <- get_eurostat(id="sdg_05_20", time_format = "num", 
                    filters = list (geo = ct))
dat [1:2,]
## # A tibble: 2 x 5
##   unit  nace_r2 geo    time values
##   <chr> <chr>   <chr> <dbl>  <dbl>
## 1 PC    B-S_X_O AT     2002   NA  
## 2 PC    B-S_X_O AT     2006   25.5
dat1<- get_eurostat(id="sdg_05_20", time_format = "num", 
                    filters = list (geo = PTandEU))
                    
dat1 [1:2,]
## # A tibble: 2 x 5
##   unit  nace_r2 geo        time values
##   <chr> <chr>   <chr>     <dbl>  <dbl>
## 1 PC    B-S_X_O EU27_2020  2002     NA
## 2 PC    B-S_X_O EU27_2020  2006     NA
dat <- label_eurostat (dat)
dat[1:3,]
## # A tibble: 3 x 5
##   unit     nace_r2                                           geo     time values
##   <chr>    <chr>                                             <chr>  <dbl>  <dbl>
## 1 Percent… Industry, construction and services (except publ… Austr…  2002   NA  
## 2 Percent… Industry, construction and services (except publ… Austr…  2006   25.5
## 3 Percent… Industry, construction and services (except publ… Austr…  2007   25.5
dat1 <- label_eurostat (dat1)
dat1[1:3,]
## # A tibble: 3 x 5
##   unit     nace_r2                              geo                  time values
##   <chr>    <chr>                                <chr>               <dbl>  <dbl>
## 1 Percent… Industry, construction and services… European Union - 2…  2002     NA
## 2 Percent… Industry, construction and services… European Union - 2…  2006     NA
## 3 Percent… Industry, construction and services… European Union - 2…  2007     NA

Clean the data (remove columns)

setDT(dat)

setDT(dat1)

dat[, c("unit", "nace_r2"):=NULL]

dat1[, c("unit", "nace_r2"):=NULL]

Evolution of Gender Pay Gap in EU, 2002-2018

(there is no available data for 2019)

Line graph

dat <- get_eurostat(id="sdg_05_20", filters = list(geo = ct))

library(ggplot2)
library(dplyr)
ggplot(dat, 
       aes(x = time, y= values, color = geo, label = geo)) + 
  geom_line (alpha = .5) +
  geom_text (data = dat %>% group_by(geo) %>% 
               filter(time == max(time)),
             size =2.6) +
  theme(legend.position = "none") + 
  labs(title = "Gender Pay Gap, 2003-2018",
       x= "Year", y= "%")

  labs(title = 'Year: {frame_time}', x = 'Time', y = 'Percentage') +
  transition_time(time) +
  ease_aes('linear')
## NULL

The same line graph, but animated

ggplot(dat, aes(time, values, color = geo, label = geo)) +
  geom_line(alpha = .5) +
  geom_text (data = dat %>% group_by(geo) %>% 
               filter(time == max(time)),
             size =2.6) +
  theme(legend.position = "none") + 
  scale_color_viridis_d() +
  labs(x = "Year", y = "Gender Pay Gap") +
  theme(legend.position = "top") + 
  labs(title = "Gender Pay Gap, 2002-2018",
       x= "Year", y= "%") +
  transition_reveal(time) +
  geom_point() +
  transition_reveal(time)

Portugal vs. European Union, 2006-2018

Portugal has no available data until 2006 and the EU only has available data from 2010 on (again, there is no available data for 2019)

line graph

pteu <-  c("European Union - 27 countries (from 2020)","Portugal")

library(ggplot2)
library(dplyr)
ggplot(dat1, 
       aes(x = time, y= values, color = geo, label = geo)) + 
  geom_line (alpha = .5) +
  geom_text (data = dat1 %>% group_by(geo) %>% 
               filter(time == max(time)),
             size =2.6) +
  theme(legend.position = "none") + 
  labs(title = "Gender Pay Gap, 2006-2019",
       x= "Year", y= "%")

Then vs Now

Now we compare the first and the latest years [2002 vs. 2019] provided in the dataset individually.

Gender Pay Gap in 2002

Bar Chart

dat_2002 <- dat %>% 
  filter(time == "2002-01-01")
ggplot (dat_2002, aes(x= reorder(geo, values), y = values)) + 
  geom_col (color = "white", fill = "grey80") + 
  theme ( axis.text.x = element_text (size = 6)) + 
  labs (title = "Gender Pay Gap in 2002", 
        y = "%", x = NULL)

Map

mapdata <-  get_eurostat_geospatial(nuts_level = 0) %>% 
  right_join (dat_2002) %>% 
  mutate (cat = cut_to_classes (values, n = 4, decimals = 1))
head(select(mapdata,geo,values,cat), 3)
## Simple feature collection with 3 features and 3 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 5.95607 ymin: 34.56908 xmax: 34.56859 ymax: 47.80401
## geographic CRS: WGS 84
##   geo values          cat                       geometry
## 1  BG   18.9 16.9 ~< 22.3 MULTIPOLYGON (((22.99717 43...
## 2  CH     NA      No data MULTIPOLYGON (((8.61383 47....
## 3  CY   22.5 22.3 ~< 27.7 MULTIPOLYGON (((33.75237 34...
ggplot(mapdata, aes(fill = cat)) + 
  scale_fill_brewer(palette = "RdYlBu") + 
  geom_sf (color = alpha("white", 1/3), alpha = .6) + 
  xlim (c(-12,44)) + ylim(c(35, 70)) + 
  labs( title = "Gender Pay Gap in 2002",
        subtitle = "% of average gross hourly earnings of men",
        fill = "%")

only a few countries have available data for 2002 Portugal only has available data from 2006 on

Let’s try using 2006 as a starting point

Bar chart

library(gghighlight)

dat_2006 <- dat %>% 
  filter(time == "2006-01-01")
ggplot (dat_2006, aes(x= reorder(geo, values), y = values)) + 
  geom_col (color = "white", fill = "tomato") + 
  gghighlight(geo == "PT") +
  theme ( axis.text.x = element_text (size = 6)) + 
  labs (title = "Gender Pay Gap in 2006", 
        y = "%", x = NULL)

Portugal is highlighted for analysis and comparison

Map

mapdata <-  get_eurostat_geospatial(nuts_level = 0) %>% 
  right_join (dat_2006) %>% 
  mutate (cat = cut_to_classes (values, n = 4, decimals = 1))
head(select(mapdata,geo,values,cat), 3)
## Simple feature collection with 3 features and 3 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 5.95607 ymin: 34.56908 xmax: 34.56859 ymax: 47.80401
## geographic CRS: WGS 84
##   geo values          cat                       geometry
## 1  BG   12.4 10.8 ~< 17.1 MULTIPOLYGON (((22.99717 43...
## 2  CH   18.6 17.1 ~< 23.4 MULTIPOLYGON (((8.61383 47....
## 3  CY   21.8 17.1 ~< 23.4 MULTIPOLYGON (((33.75237 34...
ggplot(mapdata, aes(fill = cat)) + 
  scale_fill_brewer(palette = "RdYlBu") + 
  geom_sf (color = alpha("white", 1/3), alpha = .6) + 
  xlim (c(-12,44)) + ylim(c(35, 70)) + 
  labs( title = "Gender Pay Gap in 2006",
        subtitle = "% of average gross hourly earnings of men",
        fill = "%")

## Gender Pay Gap in 2018

(again, there is no available data for 2019)

Bar Chart

dat_2018 <- dat %>% 
  filter(time == "2018-01-01")
ggplot (dat_2018, aes(x= reorder(geo, values), y = values)) + 
  geom_col (color = "white", fill = "tomato") + 
  gghighlight(geo == "PT") +
  theme ( axis.text.x = element_text (size = 6)) + 
  labs (title = "Gender Pay Gap in 2018", 
        y = "%", x = NULL)

Map

mapdata <-  get_eurostat_geospatial(nuts_level = 0) %>% 
  right_join (dat_2018) %>% 
  mutate (cat = cut_to_classes (values, n = 4, decimals = 1))
head(select(mapdata,geo,values,cat), 3)
## Simple feature collection with 3 features and 3 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 5.95607 ymin: 34.56908 xmax: 34.56859 ymax: 47.80401
## geographic CRS: WGS 84
##   geo values          cat                       geometry
## 1  BG   13.9 11.6 ~< 16.7 MULTIPOLYGON (((22.99717 43...
## 2  CH     NA      No data MULTIPOLYGON (((8.61383 47....
## 3  CY   10.4  6.5 ~< 11.6 MULTIPOLYGON (((33.75237 34...
ggplot(mapdata, aes(fill = cat)) + 
  scale_fill_brewer(palette = "RdYlBu") + 
  geom_sf (color = alpha("white", 1/3), alpha = .6) + 
  xlim (c(-12,44)) + ylim(c(35, 70)) + 
  labs( title = "Gender Pay Gap in 2018",
        subtitle = "% of average gross hourly earnings of men",
        fill = "%")

ANIMATIONS FROM 2002 TO 2019

Bar chart

ggplot (dat, aes(x= reorder(geo, values), y = values)) + 
  geom_col (color = "white", fill = "grey80") + 
  theme ( axis.text.x = element_text (size = 6)) + 
  labs( title = "Gender Pay Gap in 2019",
        subtitle = "% of average gross hourly earnings of men",
        fill = "%") + 
  labs(title = 'Year: {frame_time}') +
  transition_time(time) +
  ease_aes()